## Import data
covid =
read_csv("./data/covid_data_monthly.csv") %>%
mutate(month = month.name[as.numeric(month)],
month = as.factor(month),
month = ordered(month, levels = c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December")),
year = as.character(year),
Confirmed_of_Month = as.numeric(Confirmed_of_Month))
## Rows: 20 Columns: 6
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (1): month
## dbl (5): year, Confirmed_of_Month, Deaths_of_Month, Recovered_of_Month, Acti...
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## take a look at covid plot
covid %>%
ggplot(aes(x = month, y = Confirmed_of_Month, color = year)) +
geom_line(aes(group = year)) +
geom_point(size = 1.5)

export_volume_df =
read_csv("./data/cleaned data/export_volume_combined.csv") %>%
mutate(month = as.factor(month),
month = recode(month, "Feburary" = "February"),
month = ordered(month, levels = c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December")),
year = as.character(year))
## Rows: 855 Columns: 4
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): product_type, month
## dbl (2): year, export_volume
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
export_volume_df %>%
group_by(month,year) %>%
summarize(sum_of_export = sum(export_volume)) %>%
ggplot(aes(x = month, y = sum_of_export, color = year)) +
geom_line(aes(group = year)) +
geom_point(size = 1.5) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
## `summarise()` has grouped output by 'month'. You can override using the `.groups` argument.

import_volume_df =
read_csv("./data/cleaned data/import_volume_combined.csv") %>%
mutate(month = as.factor(month),
month = recode(month, "Feburary" = "February"),
month = ordered(month, levels = c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December")),
year = as.character(year))
## Rows: 585 Columns: 4
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): product_type, month
## dbl (2): year, import_volume
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
import_volume_df %>%
group_by(month,year) %>%
summarize(sum_of_import = sum(import_volume)) %>%
ggplot(aes(x = month, y = sum_of_import, color = year)) +
geom_line(aes(group = year)) +
geom_point(size = 1.5) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
## `summarise()` has grouped output by 'month'. You can override using the `.groups` argument.

combined_df = list(covid, export_volume_df, import_volume_df) %>%
reduce(left_join, by = c("year", "month"), all = TRUE) %>%
janitor::clean_names() %>%
rename("export_product_type" = "product_type_x",
"import_product_type" = "product_type_y")
export vs. covid case
export_fit = lm(export_volume ~ confirmed_of_month, data = combined_df)
summary(export_fit)
##
## Call:
## lm(formula = export_volume ~ confirmed_of_month, data = combined_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1885.0 -1684.6 -1548.5 -417.6 17170.7
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.930e+03 8.917e+01 21.641 <2e-16 ***
## confirmed_of_month -1.344e-07 1.230e-07 -1.093 0.274
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3489 on 4444 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.0002687, Adjusted R-squared: 4.376e-05
## F-statistic: 1.195 on 1 and 4444 DF, p-value: 0.2745
export_df =
combined_df %>%
na.omit() %>%
nest(data = confirmed_of_month:export_volume) %>%
mutate(
lm_fits = map(.x = data, ~ lm(export_volume ~ confirmed_of_month, data = .x), na.action = na.omit()),
lm_results = map(lm_fits, broom::tidy)) %>%
select(month, year, lm_results) %>%
unnest(lm_results)
export_df %>%
na.omit() %>%
ggplot(aes(x = estimate)) +
geom_density()

export_df %>%
ggplot(aes(x = month, y = estimate)) +
geom_point() +
facet_grid(~term) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

export_plot = combined_df %>%
group_by(month, year, confirmed_of_month) %>%
summarize(export_sum = sum(export_volume)) %>%
ggplot(aes(x = confirmed_of_month, y = export_sum)) +
geom_point(alpha = 0.5) +
scale_y_continuous() +
geom_smooth(se = F, color = "red", method = "lm", size = 1, linetype = 2) +
labs(title = "Covid Cases vs. Export Volume",
x = "Covid Cases",
y = "Export Volume")
## `summarise()` has grouped output by 'month', 'year'. You can override using the `.groups` argument.
combined_df %>%
mutate(
text_label = str_c("Confirmed Cases: $", confirmed_of_month, "\nExport Volume: ", export_volume)) %>%
plot_ly(
x = ~ confirmed_of_month, y = ~ export_volume, type = "scatter", mode = "markers", color = ~month, text = ~text_label, alpha = 0.5)
import vs. covid case
import_fit = lm(import_volume ~ confirmed_of_month, data = combined_df)
summary(import_fit)
##
## Call:
## lm(formula = import_volume ~ confirmed_of_month, data = combined_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -427.7 -313.1 -246.3 23.7 10390.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.413e+02 2.247e+01 19.645 <2e-16 ***
## confirmed_of_month -2.577e-08 3.098e-08 -0.832 0.406
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 878.9 on 4444 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.0001557, Adjusted R-squared: -6.929e-05
## F-statistic: 0.692 on 1 and 4444 DF, p-value: 0.4055
import_df =
combined_df %>%
na.omit() %>%
nest(data = confirmed_of_month:import_volume) %>%
mutate(
lm_fits = map(.x = data, ~ lm(import_volume ~ confirmed_of_month, data = .x), na.action = na.omit()),
lm_results = map(lm_fits, broom::tidy)) %>%
select(month, year, lm_results) %>%
unnest(lm_results)
import_df %>%
ggplot(aes(x = estimate)) +
geom_density()

import_df %>%
ggplot(aes(x = month, y = estimate)) +
geom_point() +
facet_grid(~term) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

import_plot = combined_df %>%
group_by(month, year, confirmed_of_month) %>%
summarize(import_sum = sum(import_volume)) %>%
ggplot(aes(x = confirmed_of_month, y = import_sum)) +
geom_point(alpha = 0.5) +
scale_y_continuous() +
geom_smooth(se = F, color = "red", method = "lm", size = 1, linetype = 2) +
labs(title = "Covid Cases vs. Import Volume",
x = "Covid Cases",
y = "Import Volume")
## `summarise()` has grouped output by 'month', 'year'. You can override using the `.groups` argument.
combined_df %>%
group_by(month, year, confirmed_of_month) %>%
summarize(import_sum = sum(import_volume)) %>%
mutate(
text_label = str_c("Confirmed Cases: $", confirmed_of_month, "\nImport Volume: ", import_sum)) %>%
plot_ly(
x = ~ confirmed_of_month, y = ~ import_sum, type = "scatter", mode = "markers", color = ~month, text = ~text_label, alpha = 0.5)
## `summarise()` has grouped output by 'month', 'year'. You can override using the `.groups` argument.
ggpubr::ggarrange(import_plot, export_plot)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
